In [8]:
import pandas as pd
import numpy as np
import cv2
import os
import re
import tqdm
from tqdm.auto import tqdm as tqdm
from sklearn.model_selection import KFold

from PIL import Image

import albumentations as albu
from albumentations.pytorch.transforms import ToTensorV2

import torch
from torch.optim.lr_scheduler import StepLR, MultiStepLR
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader , Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt

DIR_INPUT = '/kaggle/input/global-wheat-detection'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_TEST = f'{DIR_INPUT}/test'

Metrics

In [9]:
def iou_numpy(outputs: np.array, labels: np.array):
    outputs = outputs.squeeze(1)
    
    intersection = (outputs & labels).sum((1, 2))
    union = (outputs | labels).sum((1, 2))
    
    iou = (intersection + SMOOTH) / (union + SMOOTH)
    
    thresholded = np.ceil(np.clip(20 * (iou - 0.5), 0, 10)) / 10
    
    return thresholded  # Or thresholded.mean()

def calculate_iou(gt, pr, form='pascal_voc') -> float:
    
    if form == 'coco':
        gt = gt.copy()
        pr = pr.copy()

        gt[2] = gt[0] + gt[2]
        gt[3] = gt[1] + gt[3]
        pr[2] = pr[0] + pr[2]
        pr[3] = pr[1] + pr[3]

    # Calculate overlap area
    dx = min(gt[2], pr[2]) - max(gt[0], pr[0]) + 1
    
    if dx < 0:
        return 0.0
    
    dy = min(gt[3], pr[3]) - max(gt[1], pr[1]) + 1

    if dy < 0:
        return 0.0

    overlap_area = dx * dy

    # Calculate union area
    union_area = (
            (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1) +
            (pr[2] - pr[0] + 1) * (pr[3] - pr[1] + 1) -
            overlap_area
    )

    return overlap_area / union_area

def find_best_match(gts, pred, pred_idx, threshold=0.5, form='pascal_voc', ious=None) -> int:
       
        best_match_iou = -np.inf
        best_match_idx = -1
        for gt_idx in range(len(gts)):

            if gts[gt_idx][0] < 0:
                # Already matched GT-box
                continue

            iou = -1 if ious is None else ious[gt_idx][pred_idx]

            if iou < 0:
                iou = calculate_iou(gts[gt_idx], pred, form=form)

                if ious is not None:
                    ious[gt_idx][pred_idx] = iou

            if iou < threshold:
                continue

            if iou > best_match_iou:
                best_match_iou = iou
                best_match_idx = gt_idx

        return best_match_idx, best_match_iou
    
def calculate_image_precision(gts, preds, thresholds=(0.5,), form='coco') -> float:
        
        n_threshold = len(thresholds)
        image_precision = 0.0

        ious = np.ones((len(gts), len(preds))) * -1

        for threshold in thresholds:
            precision_at_threshold = calculate_precision(gts.copy(), preds, threshold=threshold,form=form, ious=ious)
            image_precision += precision_at_threshold / n_threshold

        return image_precision

def calculate_precision(gts, preds, threshold=0.5, form='coco', ious=None) -> float:
       
        n = len(preds)
        tp = 0
        fp = 0

        for pred_idx in range(n):

            best_match_gt_idx, _ = find_best_match(gts, preds[pred_idx], pred_idx,threshold=threshold, form=form, ious=ious)
            if best_match_gt_idx >= 0:    
                tp += 1    
                gts[best_match_gt_idx] = -1
            else:
                fp += 1

        fn = (gts.sum(axis=1) > 0).sum()
        return tp / (tp + fp + fn)
    
def collate_fn(batch):
    return tuple(zip(*batch))

class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0
In [10]:
BBOXS_DIR = '/kaggle/input/global-wheat-detection/train.csv'
train_df = pd.read_csv(BBOXS_DIR)
In [11]:
train_df['x'] = -1
train_df['y'] = -1
train_df['w'] = -1
train_df['h'] = -1

def expand_bbox(x):
    r = np.array(re.findall("([0-9]+[.]?[0-9]*)", x))
    if len(r) == 0:
        r = [-1, -1, -1, -1]
    return r

train_df[['x', 'y', 'w', 'h']] = np.stack(train_df['bbox'].apply(lambda x: expand_bbox(x)))
train_df.drop(columns=['bbox'], inplace=True)
train_df['x'] = train_df['x'].astype(np.float)
train_df['y'] = train_df['y'].astype(np.float)
train_df['w'] = train_df['w'].astype(np.float)
train_df['h'] = train_df['h'].astype(np.float)

Helper functions

In [12]:
def get_random_id():
    import random
    ids = train_df["image_id"].unique()
    random.shuffle(ids)
    return ids[0]

def get_bbox(image_id):
    image_df = train_df[train_df["image_id"]==image_id]
    bbox_list = image_df[["x","y", "w", "h"]].values
    return bbox_list

def get_img(image_id):
    img = cv2.imread(os.path.join(DIR_TRAIN, f'{image_id}.jpg'))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
    img /= 255.0
    return img

def visualize_bbox(img, bbox, color=(255,0,0), thickness=2, **kwargs):
    x_min, y_min, w, h = bbox
    x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
    return img

def augment_and_show(aug ,img_id = None  ):
    if img_id is None:
        img_id = get_random_id()
        img = get_img(img_id)
    else:
        img = get_img(img_id)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
    boxes =  get_bbox(img_id)   
    labels = np.ones(boxes.shape[0])
    
    augmented = aug(image=img,mask = None ,bboxes=boxes , labels = np.ones((boxes.shape[0],)))
    
    img = cv2.cvtColor(img , cv2.COLOR_BGR2RGB)
    img_aug = cv2.cvtColor(augmented['image'] , cv2.COLOR_BGR2RGB)
    
    for bbox in boxes:
        img = visualize_bbox(img, bbox)

    for bbox in augmented['bboxes']:
        img_aug = visualize_bbox(img_aug, bbox)
    
    f, ax = plt.subplots(1, 2, figsize=(16, 8))
        
    ax[0].imshow(img)
    ax[0].set_title('Original image')
        
    ax[1].imshow(img_aug)
    ax[1].set_title('Augmented image')

    f.tight_layout()

Test augmentation

In [13]:
aug_1 = albu.Compose([albu.RandomGamma(gamma_limit=(120, 180),p=1), albu.RandomContrast((0,0.8),p=1)])
augment_and_show(aug_1)
In [14]:
aug_2 = albu.Compose([albu.ToGray(p=1)])
augment_and_show(aug_2)
In [15]:
aug_3 = albu.ChannelShuffle(p=1)
augment_and_show(aug_3)

DataLoader

In [ ]:
class WheatDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):

        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)

        #there is only one class
        labels = torch.ones((records.shape[0],), dtype=torch.int64)
        
        #suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms:
            
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                

        return image, target, image_id

    def __len__(self):
        return self.image_ids.shape[0]

Augmentations

In [ ]:
#Albumentations
def bbox_params_f():
    return albu.BboxParams(format='pascal_voc', min_area=1, min_visibility=0.5 ,label_fields=['labels'] )

def get_train_transform_512():
    return albu.Compose([albu.Compose([albu.RandomGamma(gamma_limit=(120, 180),p=1), albu.RandomContrast((0,0.8),p=1) ], bbox_params={'format': 'pascal_voc','label_fields': ['labels']}, p=0.65),
                         albu.OneOf([albu.ToGray(p=1), albu.ChannelShuffle(p=1)], p=0.4),
                         albu.Cutout(num_holes=16, max_h_size=40, max_w_size=40, p=0.5),
                         albu.Flip(p=0.65),
                         #albu.Resize(512, 512, p=1),
                         albu.RandomSizedBBoxSafeCrop(512 , 512, p=1),
                         #albu.OneOf([albu.RandomCrop(512 , 512 ,p=1), albu.CenterCrop(512, 512, p=1)], p=0.75),
                         ToTensorV2(p=1.0)],
                         bbox_params={'format': 'pascal_voc','label_fields': ['labels']})

def get_train_transform_1024():
    return albu.Compose([albu.Compose([ albu.RandomGamma(gamma_limit=(120, 180),p=1),albu.RandomContrast((0,0.8),p=1) ], bbox_params= {'format': 'pascal_voc','label_fields': ['labels']}, p=0.65),
                         albu.OneOf([albu.ToGray(p=1),albu.ChannelShuffle(p=1)] , p=0.4),
                         albu.Cutout(num_holes=16, max_h_size=40, max_w_size=40,p=0.5),
                         albu.Flip(p=0.65),
                         ToTensorV2(p=1.0)],
                         bbox_params={'format': 'pascal_voc','label_fields': ['labels']})

def get_valid_transform():
    return albu.Compose([ToTensorV2(p=1.0)],
                         bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

Create Models

Resnet 50

In [ ]:
def resnet():
    resnet_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 2
    in_features = resnet_model.roi_heads.box_predictor.cls_score.in_features
    resnet_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return resnet_model

vgg16

In [ ]:
class BoxHead(torch.nn.Module):
        def __init__(self, vgg, dropout=False):
            super(BoxHead, self).__init__()
            classifier = list(vgg.classifier._modules.values())[:-1]
            self.classifier = torch.nn.Sequential(*classifier)

        def forward(self, x):
            x = x.flatten(start_dim=1)
            x = self.classifier(x)
            return x

def vgg_model():
    vgg = torchvision.models.vgg16(pretrained=True)
    backbone = vgg.features[:-1]
    for layer in backbone[:10]:
        for p in layer.parameters():
            p.requires_grad = False
    backbone.out_channels = 512

    box_head = BoxHead(vgg)

    # RPN - Anchor Generator
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 1.5),))

    # Head - Box RoI pooling
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)

    # Faster RCNN - Model
    vgg_model = FasterRCNN(
        backbone=backbone,
        rpn_anchor_generator=anchor_generator,
        box_roi_pool=roi_pooler,
        box_head=box_head,
        box_predictor=FastRCNNPredictor(4096, num_classes=2)
    )
    return vgg_model

Train Loop

In [ ]:
def train(epochs, train_data_loader, valid_data_loader, model, optimizer, model_name, lr_scheduler = None ):
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    loss_hist = Averager()
    train_losses, valid_precision_lst, valid_iou_lst = [], [], [] 
    
    for epoch in range(epochs):
        
        itr = 1
        loss_hist.reset()
        model.train()
        
        for images, targets, image_ids in tqdm(train_data_loader,desc = 'train'):
            
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()

            loss_hist.send(loss_value)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            itr += 1
            del images , targets , loss_dict , image_ids
            
        print(f"Epoch #{epoch+1} loss: {loss_hist.value}")
        train_losses.append(loss_hist.value)
        
        model.eval()
        with torch.no_grad():
            
            validation_precisions = []
            validation_iou = []
            iou_thresholds = [x for x in np.arange(0.5, 0.76, 0.05)]
            batch = 0
            
            for images, targets, image_ids in tqdm(valid_data_loader ,desc = 'validation'):
                    batch += 1
                    images = list(image.to(device) for image in images)
                    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                    outputs = model(images)
                    
                    #calculate batch stats
                    for i, image in enumerate(images):
                        
                            iou_single_img = 0
                            boxes = outputs[i]['boxes'].data.cpu().numpy()
                            scores = outputs[i]['scores'].data.cpu().numpy()
                            gt_boxes = targets[i]['boxes'].cpu().numpy()
                            gts = gt_boxes.copy()
                            best_match_gt_idx, best_match_iou = 0, 0
                            num_overlaps = 0
                            for pred_idx, pred in enumerate(boxes):

                                best_match_gt_idx, best_match_iou = find_best_match(gts, boxes[pred_idx], pred_idx)
                                
                                if best_match_gt_idx >= 0:
                                    # Remove the matched GT box
                                    gts[best_match_gt_idx] = -1
                                
                                if best_match_iou >= 0:
                                    num_overlaps += 1
                                    iou_single_img += best_match_iou
                                    
                                
                            #append iou for each image
                            try:
                                validation_iou.append(iou_single_img/num_overlaps)
                            except:
                                print(f"zero division : iou {iou_single_img}, overlaps {num_overlaps}")
                                
                            preds_sorted_idx = np.argsort(scores)[::-1]
                            preds_sorted = boxes[preds_sorted_idx]
                            image_precision = calculate_image_precision(preds_sorted,gt_boxes,thresholds=iou_thresholds,form='coco')
                            #append precision for each image
                            validation_precisions.append(image_precision)
                           
                    print(f"stats for batch {batch} ,iou:{np.mean(validation_iou)} ,precision:{ np.mean(validation_precisions)}")
                            
            
            
            #store stats for each epoch           
            valid_prec = np.mean(validation_precisions)
            valid_precision_lst.append(valid_prec)
            
            valid_iou = np.mean(validation_iou)
            valid_iou_lst.append(valid_iou)
                
        print(f"Epoch #{epoch+1} , presicion : {valid_prec} , iou:{valid_iou}")

        # update the learning rate
        if lr_scheduler is not None:
            lr_scheduler.step()
            
        
    torch.save(model.state_dict(), f"/kaggle/working/{model_name}.pth")
    return  train_losses, valid_precision_lst, valid_iou_lst

kFolds models

In [ ]:
result_dict = {}
skf = KFold(n_splits=5,random_state=None, shuffle=True)
image_ids = train_df['image_id'].unique()
j = 2
for i , (train_idx , valid_idx) in enumerate(skf.split(image_ids)):
    j = 4
    if j > 4:
        break
        
    model = resnet()
    params = [p for p in model.parameters() if p.requires_grad]
    
    train_fold = [ image_ids[idx] for idx in train_idx]
    train_set = train_df[train_df['image_id'].isin(train_fold)]
        
    valid_fold = [ image_ids[idx]   for idx in valid_idx]
    valid_set = train_df[train_df['image_id'].isin(valid_fold)]
    
    if j%2 == 0:
        train_dataset = WheatDataset(train_set, DIR_TRAIN, get_train_transform_512())
    else:
        train_dataset = WheatDataset(train_set, DIR_TRAIN, get_train_transform_1024())
    
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
        
    if j==4:
        batch_size = 16
    else:
        batch_size = 8
        
    valid_dataset = WheatDataset(valid_set, DIR_TRAIN, get_valid_transform())
    lr_scheduler = MultiStepLR(optimizer, [3,8])
    
    train_data_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn)

    valid_data_loader = DataLoader(
        valid_dataset,
        batch_size=8,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn)
    
    tl , vp , vi = train(15, train_data_loader, valid_data_loader, model, optimizer, f"resnet50Model{i+1}")
    with open ("train_stats.txt","w") as fp:
        fp.write("Train_loss: \n") 
        for line in tl:
            fp.write(str(line)+" ")
        fp.write("\n")
        fp.write("Validation precision: \n")
        for line in vp:
            fp.write(str(line)+" ")
        fp.write("/n")
        fp.write("Validation iou: \n")
        for line in vi:
            fp.write(str(line)+" ")
        fp.write("/n")
    result_dict[f"{i}"] = [tl , vp , vi]
In [16]:
test_df = pd.read_csv(f'{DIR_INPUT}/sample_submission.csv')
In [17]:
f = open("../input/resnetmodels/resnet_train_stats.txt",'r')
lines = f.readlines()
train_loss = [float(val) for val in lines[1].split(" ")[:-1]]
valid_precision = [float(val) for val in lines[3].split(" ")[:-1]]        
valid_iou = [float(val) for val in lines[5].split(" ")[:-1]]
In [19]:
plt.figure(figsize = [15,15])
plt.subplot(1,3,1)
plt.title("train loss")
plt.plot(train_loss)
plt.subplot(1,3,2)
plt.title("validation precision")
plt.plot(valid_precision)
plt.subplot(1,3,3)
plt.title("validation iou")
plt.plot(valid_iou)
plt.show()

Test DataLoader

In [ ]:
class WheatTestDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):

        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        if self.transforms:
            sample = {
                'image': image,
            }
            sample = self.transforms(**sample)
            image = sample['image']

        return image, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]
In [ ]:
def get_test_transform():
    return albu.Compose([
        ToTensorV2(p=1.0)
    ])

Vgg16 model

In [ ]:
class BoxHead(torch.nn.Module):
        def __init__(self, vgg, dropout=False):
            super(BoxHead, self).__init__()
            classifier = list(vgg.classifier._modules.values())[:-1]
            self.classifier = torch.nn.Sequential(*classifier)#

        def forward(self, x):
            x = x.flatten(start_dim=1)
            x = self.classifier(x)
            return x

def load_vgg16_net(checkpoint_path):
    vgg = torchvision.models.vgg16(pretrained=False)
    backbone = vgg.features[:-1]
    for layer in backbone[:10]:
        for p in layer.parameters():
            p.requires_grad = False
    backbone.out_channels = 512
    box_head = BoxHead(vgg)
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 1.5),))
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)
    vgg_model = FasterRCNN(
            backbone=backbone,
            rpn_anchor_generator=anchor_generator,
            box_roi_pool=roi_pooler,
            box_head=box_head,
            box_predictor=FastRCNNPredictor(4096, num_classes=2)
        )
    vgg_model.load_state_dict(torch.load(checkpoint_path))
    vgg_model = vgg_model.cuda()
    vgg_model.eval()
    return vgg_model

resnet 50 model

In [ ]:
def load_resnet(checkpoint_path, device):
    resnet = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)
    num_classes = 2  
    in_features = resnet.roi_heads.box_predictor.cls_score.in_features
    resnet.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    resnet.load_state_dict(torch.load(checkpoint_path))
    resnet = resnet.to(device)
    resnet.eval()
    #del checkpoint
    return resnet
In [ ]:
test_df = pd.read_csv(f'{DIR_INPUT}/sample_submission.csv')
test_dataset = WheatTestDataset(test_df, DIR_TEST, get_test_transform())

test_data_loader = DataLoader(
    test_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn
)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

WBF AND ENSAMBLE METHODS

In [ ]:
!pip install ensemble-boxes
In [ ]:
from ensemble_boxes import *


def make_ensemble_predictions(images, models):
    images = list(image.to(device) for image in images)    
    result = []
    for net in models:
        outputs = net(images)
        result.append(outputs)
    return result

def run_wbf(predictions, image_index, image_size=512, iou_thr=0.55, skip_box_thr=0.7, weights=None):
    boxes = [prediction[image_index]['boxes'].data.cpu().numpy()/(image_size-1) for prediction in predictions]
    scores = [prediction[image_index]['scores'].data.cpu().numpy() for prediction in predictions]
    labels = [np.ones(prediction[image_index]['scores'].shape[0]) for prediction in predictions]
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels
In [ ]:
renet_models = [ load_resnet('../input/resnetmodels/resnet50Model1.pth',device), 
                 load_resnet('../input/resnetmodels/resnet50Model2.pth',device), 
                 load_resnet('../input/resnetmodels/resnet50Model3.pth',device), 
                 load_resnet('../input/resnetmodels/resnet50Model4.pth',device), 
                 load_resnet('../input/resnetmodels/resnet50Model5.pth',device)
               ]
In [ ]:
vgg_models = [ load_vgg16_net('../input/vggmodels/vgg16Model1.pth', device), 
               load_vgg16_net('../input/vggmodels/vgg16Model12.pth', device), 
               load_vgg16_net('../input/vggmodels/vgg16Model13.pth', device), 
               load_vgg16_net('../input/vggmodels/vgg16Model14.pth', device), 
               load_vgg16_net('../input/vggmodels/vgg16Model15.pth', device)
             ]

show sample

In [ ]:
import matplotlib.pyplot as plt

for j, (images, image_ids) in enumerate(test_data_loader):
    if j > 0:
        break
predictions = make_ensemble_predictions(images, renet_models)

i = 1
sample = images[i].permute(1,2,0).cpu().numpy()
boxes, scores, labels = run_wbf(predictions, image_index=i)
boxes = boxes.astype(np.int32).clip(min=0, max=511)

fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(sample,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 2)
    
ax.set_axis_off()
ax.imshow(sample);

Inference

In [ ]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

results = []

for images, image_ids in test_data_loader:
    predictions = make_ensemble_predictions(images, renet_models)
    for i, image in enumerate(images):
        boxes, scores, labels = run_wbf(predictions, image_index=i)
        boxes = (boxes*2).astype(np.int32).clip(min=0, max=1023)
        image_id = image_ids[i]

        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }
        results.append(result)
In [ ]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
In [ ]:
test_df.to_csv('submission.csv', index=False)
In [ ]: